#!/bin/bash

usage() {
  cat <<EOF
usage: ${0##*/} INDEX_URL
  where "INDEX_URL" is like the following:
    http://detail.zol.com.cn/106/105361/pic.shtml
Last update: 2007-9-12 13:48:16
EOF
}
msg() {
  echo -e "$@" 1>&2
}
error() {
  echo -e "ERROR: $@" 1>&2
  exit -1
}
do_cmd() {
  msg "executing:\n$@";
  if "$@"; then
    msg "command success"
  else
    error "command failed:\n$@"
  fi
}
if [[ -z "$1" ]]; then
  init_url=$(getclip)
else
  init_url="$1"
fi

base_url=${init_url#*//}
base_url=http://${base_url%%/*}
index_page=index.html
page_dir=pages
img_dir=images

# http://detail.zol.com.cn/106/105361/pic.shtml
do_cmd wget -O$index_page "$init_url"
page_title=$(grep -ihoE '<title>.*</title>' $index_page | sed -e 's/<\/\?title>//g' |
             tr -s '[:punct:][:space:]' _ | sed -e 's/_*$//')
if [[ -z "$page_title" ]]; then error "Cannot find page title."; fi
msg "Creating working directory: $page_title"
do_cmd mkdir "$page_title"
do_cmd mv $index_page "$page_title"
do_cmd cd "$page_title"
# http://detail.zol.com.cn/picture_index_111/index1109709.shtml
grep -ihoE '/picture_index_[0-9]+/index[0-9]+\.s?html' $index_page |
wget -i- -B$base_url -P$page_dir
# http://img02.zol.com.cn/product/11/709/ce4Pkyxy5VW5E.jpg
grep -ihoE 'http://img[^"[:space:]]+zol\.com\.cn/product/[[:alnum:]/]+\.jpe?g' $page_dir/* |
sort -u |
wget -i- -P$img_dir
